{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.cross_validation import KFold\n",
    "from sklearn.ensemble import ExtraTreesRegressor\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.metrics import mean_squared_error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "\n",
    "seed = 1024\n",
    "\n",
    "np.random.seed(seed)\n",
    "\n",
    "path = '../data/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "\n",
    "train_x = pd.read_pickle(path + 'train_X.pkl')\n",
    "valid_x = pd.read_pickle(path + 'valid_X.pkl')\n",
    "dev_x = pd.read_pickle(path + 'dev_X.pkl')\n",
    "\n",
    "train_y = pd.read_pickle(path+'train.pkl')['label']\n",
    "valid_y = pd.read_pickle(path+'valid.pkl')['label']\n",
    "dev_y = pd.read_pickle(path+'dev.pkl')['label']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
       "            max_depth=8, max_features=0.5, max_leaf_nodes=None,\n",
       "            min_impurity_split=1e-07, min_samples_leaf=2,\n",
       "            min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
       "            n_estimators=250, n_jobs=16, oob_score=False,\n",
       "            random_state=None, verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rf_params = {\n",
    "    'n_jobs': 16,\n",
    "    'n_estimators': 250,\n",
    "    'max_features': 0.5,\n",
    "    'max_depth': 8,\n",
    "    'min_samples_leaf': 2,\n",
    "\n",
    "}\n",
    "\n",
    "\n",
    "clf = RandomForestClassifier(**rf_params)\n",
    "\n",
    "clf.fit(train_x,train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_pred = clf.predict(valid_x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "randomforest model the accuracy on the valid set is : 77.43%\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "y_v = (y_pred+0.5).astype(int)\n",
    "acc =  accuracy_score(valid_y,y_v)\n",
    "\n",
    "print('randomforest model the accuracy on the valid set is : {}%'.format(round(acc* 100,2)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
